package me.qingji.zebra.parser.impl;

import java.util.LinkedHashSet;
import java.util.Set;

import me.qingji.zebra.bean.Link;
import me.qingji.zebra.parser.ChapterParser;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


public abstract class AbstractChapterParser implements ChapterParser {
	public Set<Link> parse(String html, String baseUri) {
		Document document = Jsoup.parse(html, baseUri);
		
		Set<Link> links = new LinkedHashSet<Link>();
		Elements elements = document.select(contentExpress());
		//FIXME JSOUP 出现重复的元素
		for (Element element : elements) {
			Elements aTags = element.select(charpterExpress());
			for (Element aTag : aTags) {
				String url = aTag.attr("abs:href");
				if(!url.startsWith("http://www.tianyabook.com/liao/")) {
					continue;
				}
				String name = aTag.text();
				if(name.length() == 0 ) {
					System.out.println("element:"+element);
					System.out.println("aTag"+aTag);
				}
				Link link = new Link();
				link.setName(name);
				link.setUrl(url);
				links.add(link);
			}
			break;
		}
		return links;
	}

	public abstract String contentExpress();

	public abstract String charpterExpress();

}
